# -*- coding: UTF-8 -*-
#!/usr/local/bin/python

# a large text file will be splited into many html files

import re   
import os   

class BigText2Html:
    
    def __init__(self, src_file):
        self.src_file = src_file
    
    

# regex for the section title   
# sec_re = re.compile(r'第.+卷\s+.+\s+第.+章\s+.+')   
# txt book's path.   
source_path = 'f:\\佣兵天下.txt'   
path_pieces = os.path.split(source_path)   
novel_title = re.sub(r'(\..*$)|($)', '', path_pieces[1])   
target_path = '%s%s_html' % (path_pieces[0], novel_title)   
section_re = re.compile(r'^\s*第.+卷\s+.*$')   

section_head = '''
<html>   
<head>   
<meta http-equiv="Content-Type" content="gb2312"/>   
<title>%s</title>   
</head>   
<body style="font-family:楷体,宋体;font-size:16px; margin:0; padding: 20px; background:#FAFAD2;color:#2B4B86;text-align:center;">   
<h2>%s</h2>
<a href="#bottom">去页尾</a><hr/>'''   

# escape xml/html   
def escape_xml(code):   
    text = code   
    text = re.sub(r'<', '&lt;', text)   
    text = re.sub(r'>', '&gt;', text)   
    text = re.sub(r'&', '&amp;', text)   
    text = re.sub(r'\t', '&nbsp;&nbsp;&nbsp;&nbsp;', text)   
    text = re.sub(r'\s', '&nbsp;', text)   
    return text   

# entry of the script   
def main():   
    # create the output folder   
    if not os.path.exists(target_path):   
        os.mkdir(target_path)  
         
    # open the source file   
    input = open(source_path, 'r')   
    sec_count = 0   
    sec_cache = []   
    idx_cache = []   
    output = open('%s\\%d.html' % (target_path, sec_count), 'w')   
    preface_title = '%s 前言' % novel_title   
    output.writelines([section_head % (preface_title, preface_title)])   
    idx_cache.append('<li><a href="%d.html">%s</a></li>' % (sec_count, novel_title))   
    for line in input:   
        # is a chapter's title?   
        if line.strip() == '':   
            pass   
        elif re.match(section_re, line):   
            line = re.sub(r'\s+', ' ', line)   
        print 'converting %s...' % line   
    # write the section footer   
    sec_cache.append('<hr/><p>')   
    if sec_count == 0:   
        sec_cache.append('<a href="index.html">目录</a>&nbsp;|&nbsp;')   
        sec_cache.append('<a href="%d.html">下一篇</a>&nbsp;|&nbsp;' % (sec_count + 1))   
    else:   
        sec_cache.append('<a href="%d.html">上一篇</a>&nbsp;|&nbsp;' % (sec_count - 1))   
        sec_cache.append('<a href="index.html">目录</a>&nbsp;|&nbsp;')   
        sec_cache.append('<a href="%d.html">下一篇</a>&nbsp;|&nbsp;' % (sec_count + 1))   
    sec_cache.append('<a name="bottom" href="#">回页首</a></p>')   
    sec_cache.append('</body></html>')   
    output.writelines(sec_cache)   
    output.flush()   
    output.close()   
    sec_cache = []   
    sec_count += 1   
    
    # create a new section   
    output = open('%s\\%d.html' % (target_path, sec_count), 'w')   
    output.writelines([section_head % (line, line)])   
    idx_cache.append('<li><a href="%d.html">%s</a></li>' % (sec_count, line))   
    else:   
    sec_cache.append('<p style="text-align:left;">%s</p>' % escape_xml(line))   
    # write rest lines   
    sec_cache.append('<a href="%d.html">下一篇</a>&nbsp;|&nbsp;' % (sec_count - 1))   
    sec_cache.append('<a href="index.html">目录</a>&nbsp;|&nbsp;')   
    sec_cache.append('<a name="bottom" href="#">回页首</a></p></body></html>')   
    output.writelines(sec_cache)   
    output.flush()   
    output.close()   
    sec_cache = []   
    
    # write the menu   
    output = open('%s\\index.html' % (target_path), 'w')   
    menu_head = '%s 目录' % novel_title   
    output.writelines([section_head % (menu_head, menu_head), '<ul style="text-align:left">'])   
    output.writelines(idx_cache)   
    output.writelines(['</ul><body></html>'])   
    output.flush()   
    output.close()   
    inx_cache = []   
    print 'completed. %d chapter(s) in total.' % sec_count   
    
if __name__ == '__main__':   
    main()